from math import log

def calcShannonEnt(dataset_labels):
    numEntries = len(dataset_labels)
    labelCounts = {}
    for current_label in dataset_labels:
        if current_label not in labelCounts.keys():
            labelCounts[current_label] = 0
        labelCounts[current_label] += 1
    shannonEnt = 0.0
    for key in labelCounts:
        prob = float(labelCounts[key])/numEntries
        shannonEnt -= prob * log (prob, 2)
    return shannonEnt


if __name__ == "__main__":
    a = ['a','b','c','a','a','c']
    value = calcShannonEnt(a)
    print("calcShannonEnt=%f"%(value))


